library(cgdsr)
library(dplyr)
library(dbplyr)
# cgds <- cgdsr::CGDS("http://www.cbioportal.org/public-portal/")
# clinicalData <- cgdsr::getClinicalData(cgds, "gbm_tcga_pub_all")
start_time <- Sys.time()
clinicalData_tbl <- spark_read_table(sc, 'clinicaldata')
clinicalData_trans <-
clinicalData_tbl %>%
mutate(OS_STATUS = regexp_replace(OS_STATUS, "LIVING", "0")) %>%
mutate(OS_STATUS = regexp_replace(OS_STATUS, "DECEASED", "1")) %>%
mutate(DFS_STATUS = regexp_replace(DFS_STATUS, "^$|^ $", "DiseaseFree")) %>%
filter(!is.na(OS_STATUS)) %>%
mutate(OS_STATUS = as.numeric(OS_STATUS)) %>%
arrange(is.na(OS_MONTHS), OS_MONTHS) %>% ## OUFFF put Nan at the end of the column
mutate(DiseaseFree = ifelse(DFS_STATUS == "DiseaseFree", 1, 0)) %>%
mutate( n_DiseaseFree = cumsum(as.numeric(DiseaseFree == 1 ))) %>%
mutate( n_Recurred = cumsum(as.numeric(DiseaseFree == 0 )))
sql_render(clinicalData_trans)
<SQL> SELECT `ACGH_DATA`, `CANCER_TYPE`, `CANCER_TYPE_DETAILED`, `COMPLETE_DATA`, `DFS_MONTHS`, `DFS_STATUS`, `FRACTION_GENOME_ALTERED`, `KARNOFSKY_PERFORMANCE_SCORE`, `MRNA_DATA`, `MUTATION_COUNT`, `ONCOTREE_CODE`, `OS_MONTHS`, `OS_STATUS`, `PRETREATMENT_HISTORY`, `PRIOR_GLIOMA`, `SAMPLE_COUNT`, `SAMPLE_TYPE`, `SEQUENCED`, `SEX`, `TREATMENT_STATUS`, `DiseaseFree`, `n_DiseaseFree`, sum(CAST(`DiseaseFree` = 0.0 AS DOUBLE)) OVER (ORDER BY ((`OS_MONTHS`) IS NULL), `OS_MONTHS` ROWS UNBOUNDED PRECEDING) AS `n_Recurred`
FROM (SELECT `ACGH_DATA`, `CANCER_TYPE`, `CANCER_TYPE_DETAILED`, `COMPLETE_DATA`, `DFS_MONTHS`, `DFS_STATUS`, `FRACTION_GENOME_ALTERED`, `KARNOFSKY_PERFORMANCE_SCORE`, `MRNA_DATA`, `MUTATION_COUNT`, `ONCOTREE_CODE`, `OS_MONTHS`, `OS_STATUS`, `PRETREATMENT_HISTORY`, `PRIOR_GLIOMA`, `SAMPLE_COUNT`, `SAMPLE_TYPE`, `SEQUENCED`, `SEX`, `TREATMENT_STATUS`, `DiseaseFree`, sum(CAST(`DiseaseFree` = 1.0 AS DOUBLE)) OVER (ORDER BY ((`OS_MONTHS`) IS NULL), `OS_MONTHS` ROWS UNBOUNDED PRECEDING) AS `n_DiseaseFree`
FROM (SELECT `ACGH_DATA`, `CANCER_TYPE`, `CANCER_TYPE_DETAILED`, `COMPLETE_DATA`, `DFS_MONTHS`, `DFS_STATUS`, `FRACTION_GENOME_ALTERED`, `KARNOFSKY_PERFORMANCE_SCORE`, `MRNA_DATA`, `MUTATION_COUNT`, `ONCOTREE_CODE`, `OS_MONTHS`, `OS_STATUS`, `PRETREATMENT_HISTORY`, `PRIOR_GLIOMA`, `SAMPLE_COUNT`, `SAMPLE_TYPE`, `SEQUENCED`, `SEX`, `TREATMENT_STATUS`, CASE WHEN (`DFS_STATUS` = "DiseaseFree") THEN (1.0) WHEN NOT(`DFS_STATUS` = "DiseaseFree") THEN (0.0) END AS `DiseaseFree`
FROM (SELECT *
FROM (SELECT `ACGH_DATA`, `CANCER_TYPE`, `CANCER_TYPE_DETAILED`, `COMPLETE_DATA`, `DFS_MONTHS`, `DFS_STATUS`, `FRACTION_GENOME_ALTERED`, `KARNOFSKY_PERFORMANCE_SCORE`, `MRNA_DATA`, `MUTATION_COUNT`, `ONCOTREE_CODE`, `OS_MONTHS`, CAST(`OS_STATUS` AS DOUBLE) AS `OS_STATUS`, `PRETREATMENT_HISTORY`, `PRIOR_GLIOMA`, `SAMPLE_COUNT`, `SAMPLE_TYPE`, `SEQUENCED`, `SEX`, `TREATMENT_STATUS`
FROM (SELECT *
FROM (SELECT `ACGH_DATA`, `CANCER_TYPE`, `CANCER_TYPE_DETAILED`, `COMPLETE_DATA`, `DFS_MONTHS`, REGEXP_REPLACE(`DFS_STATUS`, "^$|^ $", "DiseaseFree") AS `DFS_STATUS`, `FRACTION_GENOME_ALTERED`, `KARNOFSKY_PERFORMANCE_SCORE`, `MRNA_DATA`, `MUTATION_COUNT`, `ONCOTREE_CODE`, `OS_MONTHS`, `OS_STATUS`, `PRETREATMENT_HISTORY`, `PRIOR_GLIOMA`, `SAMPLE_COUNT`, `SAMPLE_TYPE`, `SEQUENCED`, `SEX`, `TREATMENT_STATUS`
FROM (SELECT `ACGH_DATA`, `CANCER_TYPE`, `CANCER_TYPE_DETAILED`, `COMPLETE_DATA`, `DFS_MONTHS`, `DFS_STATUS`, `FRACTION_GENOME_ALTERED`, `KARNOFSKY_PERFORMANCE_SCORE`, `MRNA_DATA`, `MUTATION_COUNT`, `ONCOTREE_CODE`, `OS_MONTHS`, REGEXP_REPLACE(`OS_STATUS`, "DECEASED", "1") AS `OS_STATUS`, `PRETREATMENT_HISTORY`, `PRIOR_GLIOMA`, `SAMPLE_COUNT`, `SAMPLE_TYPE`, `SEQUENCED`, `SEX`, `TREATMENT_STATUS`
FROM (SELECT `ACGH_DATA`, `CANCER_TYPE`, `CANCER_TYPE_DETAILED`, `COMPLETE_DATA`, `DFS_MONTHS`, `DFS_STATUS`, `FRACTION_GENOME_ALTERED`, `KARNOFSKY_PERFORMANCE_SCORE`, `MRNA_DATA`, `MUTATION_COUNT`, `ONCOTREE_CODE`, `OS_MONTHS`, REGEXP_REPLACE(`OS_STATUS`, "LIVING", "0") AS `OS_STATUS`, `PRETREATMENT_HISTORY`, `PRIOR_GLIOMA`, `SAMPLE_COUNT`, `SAMPLE_TYPE`, `SEQUENCED`, `SEX`, `TREATMENT_STATUS`
FROM `clinicaldata`) `qzrlxyctvc`) `rtiwddyzok`) `kfzzvpauzi`
WHERE (NOT(((`OS_STATUS`) IS NULL)))) `hkfcvvggjy`) `kztovjhshy`
ORDER BY ((`OS_MONTHS`) IS NULL), `OS_MONTHS`) `pvlbruytmx`) `gltmwdtliu`) `tmbxachzcz`
LS0tCnRpdGxlOiAiIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgoKCmBgYHtyfQpsaWJyYXJ5KGNnZHNyKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KGRicGx5cikKCiAgY2dkcyA8LSBjZ2Rzcjo6Q0dEUygiaHR0cDovL3d3dy5jYmlvcG9ydGFsLm9yZy9wdWJsaWMtcG9ydGFsLyIpCiAgCiAgY2xpbmljYWxEYXRhIDwtIGNnZHNyOjpnZXRDbGluaWNhbERhdGEoY2dkcywgImdibV90Y2dhX3B1Yl9hbGwiKQoKICAgIGNsaW5pY2FsRGF0YV90cmFucyA8LQogICAgIGNsaW5pY2FsRGF0YSAlPiUKICAgICAgbXV0YXRlKE9TX1NUQVRVUyA9IHJlZ2V4cF9yZXBsYWNlKE9TX1NUQVRVUywgIkxJVklORyIsICIwIikpICU+JQogICAgICBtdXRhdGUoT1NfU1RBVFVTID0gcmVnZXhwX3JlcGxhY2UoT1NfU1RBVFVTLCAiREVDRUFTRUQiLCAiMSIpKSAlPiUKICAgICAgbXV0YXRlKERGU19TVEFUVVMgPSByZWdleHBfcmVwbGFjZShERlNfU1RBVFVTLCAiXiR8XiAkIiwgIkRpc2Vhc2VGcmVlIikpICU+JQogICAgICBmaWx0ZXIoIWlzLm5hKE9TX1NUQVRVUykpICU+JQogICAgICBtdXRhdGUoT1NfU1RBVFVTID0gYXMubnVtZXJpYyhPU19TVEFUVVMpKSAlPiUKICAgICAgYXJyYW5nZShpcy5uYShPU19NT05USFMpLCBPU19NT05USFMpICU+JSAgIyMgT1VGRkYgcHV0IE5hbiBhdCB0aGUgZW5kIG9mIHRoZSBjb2x1bW4KICAgICAgbXV0YXRlKERpc2Vhc2VGcmVlID0gaWZlbHNlKERGU19TVEFUVVMgPT0gIkRpc2Vhc2VGcmVlIiwgMSwgMCkpICU+JSAKICAgICAgbXV0YXRlKCBuX0Rpc2Vhc2VGcmVlID0gY3Vtc3VtKGFzLm51bWVyaWMoRGlzZWFzZUZyZWUgPT0gMSApKSkgJT4lCiAgICAgIG11dGF0ZSggbl9SZWN1cnJlZCA9IGN1bXN1bShhcy5udW1lcmljKERpc2Vhc2VGcmVlID09IDAgKSkpCiAgCnNxbF9yZW5kZXIoY2xpbmljYWxEYXRhX3RyYW5zKQpgYGAKCgoK